package com.bw.test4;

import org.apache.flink.types.Row;

import com.alibaba.alink.operator.batch.BatchOperator;
import com.alibaba.alink.operator.batch.source.MemSourceBatchOp;
import com.alibaba.alink.pipeline.clustering.KMeans;
import org.junit.Test;

import java.util.Arrays;
import java.util.List;

public class KMeansTest {
	@Test
	public void testKMeans() throws Exception {
		BatchOperator.setParallelism(1);
		// 数据集
		List <Row> df = Arrays.asList(
			Row.of(0, "0 0 0"),
			Row.of(1, "0.1,0.1,0.1"),
			Row.of(2, "0.2,0.2,0.2"),
			Row.of(3, "9 9 9"),
			Row.of(4, "9.1 9.1 9.1"),
			Row.of(5, "9.2 9.2 9.2")
		);

		BatchOperator <?> inOp = new MemSourceBatchOp(df, "id int, vec string");
		// 聚类
		KMeans kmeans = new KMeans()
			.setVectorCol("vec")
			.setK(3) // 分几堆
			.setPredictionCol("pred");
		// 聚类
		kmeans.fit(inOp).transform(inOp).print();
	}
}
